{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# `profile_columns`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "tags": [
     "hide_input"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-4f3ad6c339624cf6b114074dccc6d1d4.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-4f3ad6c339624cf6b114074dccc6d1d4.vega-embed details,\n",
       "  #altair-viz-4f3ad6c339624cf6b114074dccc6d1d4.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-4f3ad6c339624cf6b114074dccc6d1d4\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-4f3ad6c339624cf6b114074dccc6d1d4\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-4f3ad6c339624cf6b114074dccc6d1d4\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"vconcat\": [{\"hconcat\": [{\"mark\": {\"type\": \"line\", \"interpolate\": \"step-after\"}, \"data\": {\"values\": [{\"percentile_ex_nulls\": 0.0, \"percentile_inc_nulls\": 0.0, \"value_count\": 1, \"group_name\": \"_unique_id_\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 50578.0, \"distinct_value_count\": 50578}, {\"percentile_ex_nulls\": 1.0, \"percentile_inc_nulls\": 1.0, \"value_count\": 1, \"group_name\": \"_unique_id_\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 50578.0, \"distinct_value_count\": 50578}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"percentile_ex_nulls\", \"type\": \"quantitative\"}, {\"field\": \"percentile_inc_nulls\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"percentile_ex_nulls\", \"sort\": \"descending\", \"title\": \"Percentile\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Count of values\", \"type\": \"quantitative\"}}, \"title\": {\"text\": \"Distribution of counts of values in column \\\"unique_id\\\"\", \"subtitle\": \"In this col, 0 values (0.0%) are null and there are 50578 distinct values\"}}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q2296770-2\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q1443188-2\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q1443188-3\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q90404618-5\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q21464185-4\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Top 5 values by value count\"}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q2296770-2\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q1443188-2\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q1443188-3\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q90404618-5\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}, {\"value_count\": 1, \"group_name\": \"_unique_id_\", \"value\": \"Q21464185-4\", \"total_non_null_rows\": 50578, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 50578}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"scale\": {\"domain\": [0, 1]}, \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Bottom 5 values by value count\"}]}, {\"hconcat\": [{\"mark\": {\"type\": \"line\", \"interpolate\": \"step-after\"}, \"data\": {\"values\": [{\"percentile_ex_nulls\": 0.9983766078948975, \"percentile_inc_nulls\": 0.9983787536621094, \"value_count\": 82, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 82.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9968719482421875, \"percentile_inc_nulls\": 0.9968761205673218, \"value_count\": 76, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 76.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9956445097923279, \"percentile_inc_nulls\": 0.9956502914428711, \"value_count\": 62, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 62.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9946348071098328, \"percentile_inc_nulls\": 0.9946419596672058, \"value_count\": 51, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 51.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9936845302581787, \"percentile_inc_nulls\": 0.993692934513092, \"value_count\": 48, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 48.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9928728342056274, \"percentile_inc_nulls\": 0.992882251739502, \"value_count\": 41, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 41.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.992100715637207, \"percentile_inc_nulls\": 0.9921112060546875, \"value_count\": 39, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 39.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9898437857627869, \"percentile_inc_nulls\": 0.9898572564125061, \"value_count\": 38, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 114.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9891310930252075, \"percentile_inc_nulls\": 0.9891454577445984, \"value_count\": 36, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 36.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9884381890296936, \"percentile_inc_nulls\": 0.9884535074234009, \"value_count\": 35, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 35.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9877848625183105, \"percentile_inc_nulls\": 0.9878010153770447, \"value_count\": 33, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 33.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9872106909751892, \"percentile_inc_nulls\": 0.9872276782989502, \"value_count\": 29, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 29.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9862208366394043, \"percentile_inc_nulls\": 0.9862390756607056, \"value_count\": 25, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 50.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9857654571533203, \"percentile_inc_nulls\": 0.9857843518257141, \"value_count\": 23, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 23.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9832709431648254, \"percentile_inc_nulls\": 0.9832931160926819, \"value_count\": 21, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 126.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9828749895095825, \"percentile_inc_nulls\": 0.9828976988792419, \"value_count\": 20, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 20.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9817465543746948, \"percentile_inc_nulls\": 0.9817707538604736, \"value_count\": 19, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 57.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9803211092948914, \"percentile_inc_nulls\": 0.9803471565246582, \"value_count\": 18, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 72.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9796479940414429, \"percentile_inc_nulls\": 0.9796749353408813, \"value_count\": 17, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 34.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9783809185028076, \"percentile_inc_nulls\": 0.9784095883369446, \"value_count\": 16, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 64.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9760051965713501, \"percentile_inc_nulls\": 0.9760370254516602, \"value_count\": 15, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 120.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9732335805892944, \"percentile_inc_nulls\": 0.973268985748291, \"value_count\": 14, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 140.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9688582420349121, \"percentile_inc_nulls\": 0.9688995480537415, \"value_count\": 13, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 221.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.9579299688339233, \"percentile_inc_nulls\": 0.9579856991767883, \"value_count\": 12, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 552.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.941814661026001, \"percentile_inc_nulls\": 0.9418917298316956, \"value_count\": 11, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 814.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.91152423620224, \"percentile_inc_nulls\": 0.9116414189338684, \"value_count\": 10, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1530.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.8792738318443298, \"percentile_inc_nulls\": 0.8794337511062622, \"value_count\": 9, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1629.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.835085391998291, \"percentile_inc_nulls\": 0.8353039026260376, \"value_count\": 8, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2232.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.7874126434326172, \"percentile_inc_nulls\": 0.7876942753791809, \"value_count\": 7, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2408.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.7281384468078613, \"percentile_inc_nulls\": 0.7284985780715942, \"value_count\": 6, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2994.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.664983868598938, \"percentile_inc_nulls\": 0.6654276847839355, \"value_count\": 5, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 3190.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.587297797203064, \"percentile_inc_nulls\": 0.5878444910049438, \"value_count\": 4, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 3924.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.4821721911430359, \"percentile_inc_nulls\": 0.48285818099975586, \"value_count\": 3, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 5310.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.3309575915336609, \"percentile_inc_nulls\": 0.33184391260147095, \"value_count\": 2, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 7638.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 0.0, \"percentile_inc_nulls\": 0.0013247132301330566, \"value_count\": 1, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 16717.0, \"distinct_value_count\": 25573}, {\"percentile_ex_nulls\": 1.0, \"percentile_inc_nulls\": 1.0, \"value_count\": 82, \"group_name\": \"_full_name_\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 82.0, \"distinct_value_count\": 25573}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"percentile_ex_nulls\", \"type\": \"quantitative\"}, {\"field\": \"percentile_inc_nulls\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"percentile_ex_nulls\", \"sort\": \"descending\", \"title\": \"Percentile\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Count of values\", \"type\": \"quantitative\"}}, \"title\": {\"text\": \"Distribution of counts of values in column \\\"full_name\\\"\", \"subtitle\": \"In this col, 67 values (0.1%) are null and there are 25573 distinct values\"}}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 82, \"group_name\": \"_full_name_\", \"value\": \"william\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 76, \"group_name\": \"_full_name_\", \"value\": \"john\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 62, \"group_name\": \"_full_name_\", \"value\": \"sir baronet\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 51, \"group_name\": \"_full_name_\", \"value\": \"thomas\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 48, \"group_name\": \"_full_name_\", \"value\": \"sir 1st baronet\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Top 5 values by value count\"}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"_full_name_\", \"value\": \"thomas clifford of chudleigh\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 1, \"group_name\": \"_full_name_\", \"value\": \"rhomas clifford chudleigh\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 1, \"group_name\": \"_full_name_\", \"value\": \"thomas clifford, 1st baron clifford of chudleigh\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 1, \"group_name\": \"_full_name_\", \"value\": \"thomas 1st chudleigh\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}, {\"value_count\": 1, \"group_name\": \"_full_name_\", \"value\": \"thomas clifford, baron chudleigh\", \"total_non_null_rows\": 50511, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 25573}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"scale\": {\"domain\": [0, 82]}, \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Bottom 5 values by value count\"}]}, {\"hconcat\": [{\"mark\": {\"type\": \"line\", \"interpolate\": \"step-after\"}, \"data\": {\"values\": [{\"percentile_ex_nulls\": 0.9847862124443054, \"percentile_inc_nulls\": 0.98821622133255, \"value_count\": 596, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 596.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9702361226081848, \"percentile_inc_nulls\": 0.9769464731216431, \"value_count\": 570, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 570.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9574728608131409, \"percentile_inc_nulls\": 0.9670608043670654, \"value_count\": 500, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 500.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9451180696487427, \"percentile_inc_nulls\": 0.957491397857666, \"value_count\": 484, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 484.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9329419136047363, \"percentile_inc_nulls\": 0.9480603933334351, \"value_count\": 477, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 477.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9225781559944153, \"percentile_inc_nulls\": 0.9400331974029541, \"value_count\": 406, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 406.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9122399687767029, \"percentile_inc_nulls\": 0.9320257902145386, \"value_count\": 405, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 405.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9027185440063477, \"percentile_inc_nulls\": 0.924651026725769, \"value_count\": 373, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 373.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8937842845916748, \"percentile_inc_nulls\": 0.9177310466766357, \"value_count\": 350, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 350.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8850797414779663, \"percentile_inc_nulls\": 0.910988986492157, \"value_count\": 341, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 341.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.876451849937439, \"percentile_inc_nulls\": 0.9043062329292297, \"value_count\": 338, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 338.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8680791258811951, \"percentile_inc_nulls\": 0.8978211879730225, \"value_count\": 328, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 328.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8617995977401733, \"percentile_inc_nulls\": 0.8929573893547058, \"value_count\": 246, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 246.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8560306429862976, \"percentile_inc_nulls\": 0.8884890675544739, \"value_count\": 226, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 226.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.850542426109314, \"percentile_inc_nulls\": 0.8842381834983826, \"value_count\": 215, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 215.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8452329635620117, \"percentile_inc_nulls\": 0.8801257610321045, \"value_count\": 208, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 208.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8408934473991394, \"percentile_inc_nulls\": 0.8767645955085754, \"value_count\": 170, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 170.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8368091583251953, \"percentile_inc_nulls\": 0.8736011981964111, \"value_count\": 160, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 160.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8329547047615051, \"percentile_inc_nulls\": 0.8706156611442566, \"value_count\": 151, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 151.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8294064998626709, \"percentile_inc_nulls\": 0.8678674697875977, \"value_count\": 139, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 139.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8262922763824463, \"percentile_inc_nulls\": 0.8654553294181824, \"value_count\": 122, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 122.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8232546448707581, \"percentile_inc_nulls\": 0.8631025552749634, \"value_count\": 119, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 119.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8202425241470337, \"percentile_inc_nulls\": 0.860769510269165, \"value_count\": 118, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 118.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8173069357872009, \"percentile_inc_nulls\": 0.8584957718849182, \"value_count\": 115, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 115.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8143969178199768, \"percentile_inc_nulls\": 0.8562418222427368, \"value_count\": 114, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 114.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8088321685791016, \"percentile_inc_nulls\": 0.8519316911697388, \"value_count\": 109, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 218.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8060753345489502, \"percentile_inc_nulls\": 0.8497963547706604, \"value_count\": 108, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 108.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8034460544586182, \"percentile_inc_nulls\": 0.8477599024772644, \"value_count\": 103, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 103.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8009700179100037, \"percentile_inc_nulls\": 0.8458420634269714, \"value_count\": 97, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 97.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7986981272697449, \"percentile_inc_nulls\": 0.8440824151039124, \"value_count\": 89, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 89.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7964773178100586, \"percentile_inc_nulls\": 0.8423622846603394, \"value_count\": 87, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 87.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.794282078742981, \"percentile_inc_nulls\": 0.8406619429588318, \"value_count\": 86, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 86.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7921888828277588, \"percentile_inc_nulls\": 0.8390406966209412, \"value_count\": 82, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 82.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7884109616279602, \"percentile_inc_nulls\": 0.8361145257949829, \"value_count\": 74, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 148.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7847351431846619, \"percentile_inc_nulls\": 0.8332674503326416, \"value_count\": 72, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7829228043556213, \"percentile_inc_nulls\": 0.8318636417388916, \"value_count\": 71, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 71.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7811869978904724, \"percentile_inc_nulls\": 0.8305191993713379, \"value_count\": 68, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 68.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7778174877166748, \"percentile_inc_nulls\": 0.8279093503952026, \"value_count\": 66, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7762093544006348, \"percentile_inc_nulls\": 0.8266637325286865, \"value_count\": 63, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 63.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7746266722679138, \"percentile_inc_nulls\": 0.8254379034042358, \"value_count\": 62, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 62.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7730695605278015, \"percentile_inc_nulls\": 0.8242318630218506, \"value_count\": 61, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 61.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.768474817276001, \"percentile_inc_nulls\": 0.8206729888916016, \"value_count\": 60, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7669942378997803, \"percentile_inc_nulls\": 0.8195262551307678, \"value_count\": 58, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 58.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7641863226890564, \"percentile_inc_nulls\": 0.8173514008522034, \"value_count\": 55, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 110.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7628079056739807, \"percentile_inc_nulls\": 0.8162837624549866, \"value_count\": 54, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 54.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7614805698394775, \"percentile_inc_nulls\": 0.8152556419372559, \"value_count\": 52, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 52.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7576515674591064, \"percentile_inc_nulls\": 0.8122899532318115, \"value_count\": 50, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 150.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.0, \"percentile_inc_nulls\": 0.22545373439788818, \"value_count\": 1, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 4908.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7564007639884949, \"percentile_inc_nulls\": 0.8113211393356323, \"value_count\": 49, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 49.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7551754713058472, \"percentile_inc_nulls\": 0.8103721141815186, \"value_count\": 48, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 48.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7539757490158081, \"percentile_inc_nulls\": 0.8094428777694702, \"value_count\": 47, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 47.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7504531145095825, \"percentile_inc_nulls\": 0.8067144155502319, \"value_count\": 46, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 138.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7482067346572876, \"percentile_inc_nulls\": 0.8049744963645935, \"value_count\": 44, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 88.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7449138164520264, \"percentile_inc_nulls\": 0.8024239540100098, \"value_count\": 43, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 129.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7438417673110962, \"percentile_inc_nulls\": 0.801593542098999, \"value_count\": 42, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 42.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.741748571395874, \"percentile_inc_nulls\": 0.7999722957611084, \"value_count\": 41, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 82.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7397064566612244, \"percentile_inc_nulls\": 0.7983906269073486, \"value_count\": 40, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 80.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7377153635025024, \"percentile_inc_nulls\": 0.7968484163284302, \"value_count\": 39, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 78.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7367453575134277, \"percentile_inc_nulls\": 0.7960970997810364, \"value_count\": 38, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 38.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7358008623123169, \"percentile_inc_nulls\": 0.795365571975708, \"value_count\": 37, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 37.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7321250438690186, \"percentile_inc_nulls\": 0.7925184965133667, \"value_count\": 36, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7294448018074036, \"percentile_inc_nulls\": 0.7904424667358398, \"value_count\": 35, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 105.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7242373824119568, \"percentile_inc_nulls\": 0.7864091396331787, \"value_count\": 34, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 204.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7225526571273804, \"percentile_inc_nulls\": 0.7851042151451111, \"value_count\": 33, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 66.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7217358350753784, \"percentile_inc_nulls\": 0.7844715118408203, \"value_count\": 32, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 32.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7185705304145813, \"percentile_inc_nulls\": 0.782019853591919, \"value_count\": 31, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 124.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7155073285102844, \"percentile_inc_nulls\": 0.7796472907066345, \"value_count\": 30, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 120.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7118059992790222, \"percentile_inc_nulls\": 0.7767804265022278, \"value_count\": 29, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 145.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7096617817878723, \"percentile_inc_nulls\": 0.7751196026802063, \"value_count\": 28, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 84.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7075941562652588, \"percentile_inc_nulls\": 0.7735181450843811, \"value_count\": 27, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 81.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7056030631065369, \"percentile_inc_nulls\": 0.7719759941101074, \"value_count\": 26, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 78.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7030503749847412, \"percentile_inc_nulls\": 0.7699987888336182, \"value_count\": 25, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 100.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6993745565414429, \"percentile_inc_nulls\": 0.7671517133712769, \"value_count\": 24, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6964390277862549, \"percentile_inc_nulls\": 0.7648780345916748, \"value_count\": 23, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 115.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6913847923278809, \"percentile_inc_nulls\": 0.7609632611274719, \"value_count\": 22, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 198.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6806637048721313, \"percentile_inc_nulls\": 0.752659261226654, \"value_count\": 21, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 420.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.676068902015686, \"percentile_inc_nulls\": 0.749100387096405, \"value_count\": 20, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6649138927459717, \"percentile_inc_nulls\": 0.7404602766036987, \"value_count\": 19, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 437.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6571027040481567, \"percentile_inc_nulls\": 0.7344102263450623, \"value_count\": 18, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 306.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6462540030479431, \"percentile_inc_nulls\": 0.726007342338562, \"value_count\": 17, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 425.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6368602514266968, \"percentile_inc_nulls\": 0.7187314629554749, \"value_count\": 16, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 368.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6226930618286133, \"percentile_inc_nulls\": 0.7077583074569702, \"value_count\": 15, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 555.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6012507677078247, \"percentile_inc_nulls\": 0.6911503076553345, \"value_count\": 14, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 840.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.5826675295829773, \"percentile_inc_nulls\": 0.6767566800117493, \"value_count\": 13, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 728.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.549891471862793, \"percentile_inc_nulls\": 0.6513701677322388, \"value_count\": 12, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1284.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.5184428691864014, \"percentile_inc_nulls\": 0.627011775970459, \"value_count\": 11, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1232.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.4778558015823364, \"percentile_inc_nulls\": 0.5955751538276672, \"value_count\": 10, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1590.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.4351244568824768, \"percentile_inc_nulls\": 0.5624777674674988, \"value_count\": 9, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1674.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.3899936079978943, \"percentile_inc_nulls\": 0.5275218486785889, \"value_count\": 8, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1768.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.34603703022003174, \"percentile_inc_nulls\": 0.49347543716430664, \"value_count\": 7, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1722.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.30437779426574707, \"percentile_inc_nulls\": 0.46120840311050415, \"value_count\": 6, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1632.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.26340776681900024, \"percentile_inc_nulls\": 0.42947524785995483, \"value_count\": 5, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1605.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.22430121898651123, \"percentile_inc_nulls\": 0.3991854190826416, \"value_count\": 4, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1532.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.18179959058761597, \"percentile_inc_nulls\": 0.3662659525871277, \"value_count\": 3, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1665.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.12528395652770996, \"percentile_inc_nulls\": 0.32249200344085693, \"value_count\": 2, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2214.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 1.0, \"percentile_inc_nulls\": 1.0, \"value_count\": 596, \"group_name\": \"_dob_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 596.0, \"distinct_value_count\": 8985}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"percentile_ex_nulls\", \"type\": \"quantitative\"}, {\"field\": \"percentile_inc_nulls\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"percentile_ex_nulls\", \"sort\": \"descending\", \"title\": \"Percentile\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Count of values\", \"type\": \"quantitative\"}}, \"title\": {\"text\": \"Distribution of counts of values in column \\\"dob\\\"\", \"subtitle\": \"In this col, 11,403 values (22.5%) are null and there are 8985 distinct values\"}}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 596, \"group_name\": \"_dob_\", \"value\": \"1862-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 570, \"group_name\": \"_dob_\", \"value\": \"1860-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 500, \"group_name\": \"_dob_\", \"value\": \"1861-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 484, \"group_name\": \"_dob_\", \"value\": \"1850-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 477, \"group_name\": \"_dob_\", \"value\": \"1858-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Top 5 values by value count\"}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"_dob_\", \"value\": \"1777-89-09\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"_dob_\", \"value\": \"1834-70-14\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"_dob_\", \"value\": \"1865-08-12\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"_dob_\", \"value\": \"1721-07-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"_dob_\", \"value\": \"1845-71-19\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"scale\": {\"domain\": [0, 596]}, \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Bottom 5 values by value count\"}]}, {\"hconcat\": [{\"mark\": {\"type\": \"line\", \"interpolate\": \"step-after\"}, \"data\": {\"values\": [{\"percentile_ex_nulls\": 0.6836498975753784, \"percentile_inc_nulls\": 0.7269761562347412, \"value_count\": 119, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 238.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6809465885162354, \"percentile_inc_nulls\": 0.7246431112289429, \"value_count\": 118, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 118.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6782662868499756, \"percentile_inc_nulls\": 0.72232985496521, \"value_count\": 117, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 117.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6756088137626648, \"percentile_inc_nulls\": 0.7200363874435425, \"value_count\": 116, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 116.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6678426265716553, \"percentile_inc_nulls\": 0.7133338451385498, \"value_count\": 113, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 339.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6601452827453613, \"percentile_inc_nulls\": 0.7066906690597534, \"value_count\": 112, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 336.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6551511287689209, \"percentile_inc_nulls\": 0.7023804783821106, \"value_count\": 109, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 218.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.650202751159668, \"percentile_inc_nulls\": 0.6981098651885986, \"value_count\": 108, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 216.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6453460454940796, \"percentile_inc_nulls\": 0.6939183473587036, \"value_count\": 106, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 212.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6406267881393433, \"percentile_inc_nulls\": 0.6898453831672668, \"value_count\": 103, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 206.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6359533667564392, \"percentile_inc_nulls\": 0.6858119964599609, \"value_count\": 102, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 204.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6336395740509033, \"percentile_inc_nulls\": 0.6838151216506958, \"value_count\": 101, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 101.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.629057765007019, \"percentile_inc_nulls\": 0.6798608303070068, \"value_count\": 100, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6245217323303223, \"percentile_inc_nulls\": 0.675946056842804, \"value_count\": 99, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 198.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.613296389579773, \"percentile_inc_nulls\": 0.6662580966949463, \"value_count\": 98, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 490.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6066298484802246, \"percentile_inc_nulls\": 0.6605045795440674, \"value_count\": 97, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 291.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6044305562973022, \"percentile_inc_nulls\": 0.6586065292358398, \"value_count\": 96, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 96.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6022771596908569, \"percentile_inc_nulls\": 0.6567479968070984, \"value_count\": 94, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 94.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.598016083240509, \"percentile_inc_nulls\": 0.6530705094337463, \"value_count\": 93, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 186.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5938008427619934, \"percentile_inc_nulls\": 0.6494325399398804, \"value_count\": 92, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 184.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5917619466781616, \"percentile_inc_nulls\": 0.6476728916168213, \"value_count\": 89, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 89.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5857139825820923, \"percentile_inc_nulls\": 0.6424532532691956, \"value_count\": 88, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 264.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.577924907207489, \"percentile_inc_nulls\": 0.6357309818267822, \"value_count\": 85, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 340.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5741220116615295, \"percentile_inc_nulls\": 0.6324489116668701, \"value_count\": 83, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 166.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.562850832939148, \"percentile_inc_nulls\": 0.6227213144302368, \"value_count\": 82, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 492.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.557283878326416, \"percentile_inc_nulls\": 0.6179168820381165, \"value_count\": 81, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 243.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5464250445365906, \"percentile_inc_nulls\": 0.608545184135437, \"value_count\": 79, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 474.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5392774343490601, \"percentile_inc_nulls\": 0.6023765206336975, \"value_count\": 78, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 312.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5304574966430664, \"percentile_inc_nulls\": 0.5947645306587219, \"value_count\": 77, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 385.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5234931707382202, \"percentile_inc_nulls\": 0.5887539982795715, \"value_count\": 76, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 304.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5150167942047119, \"percentile_inc_nulls\": 0.5814385414123535, \"value_count\": 74, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 370.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5083274245262146, \"percentile_inc_nulls\": 0.575665295124054, \"value_count\": 73, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 292.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5000801682472229, \"percentile_inc_nulls\": 0.5685476064682007, \"value_count\": 72, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 360.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4919474720954895, \"percentile_inc_nulls\": 0.561528742313385, \"value_count\": 71, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 355.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4871366024017334, \"percentile_inc_nulls\": 0.5573767423629761, \"value_count\": 70, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 210.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4792330265045166, \"percentile_inc_nulls\": 0.5505555868148804, \"value_count\": 69, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 345.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4761173725128174, \"percentile_inc_nulls\": 0.547866702079773, \"value_count\": 68, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 136.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.47151267528533936, \"percentile_inc_nulls\": 0.5438926219940186, \"value_count\": 67, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 201.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4684886932373047, \"percentile_inc_nulls\": 0.5412827730178833, \"value_count\": 66, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4625323414802551, \"percentile_inc_nulls\": 0.5361422300338745, \"value_count\": 65, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 260.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4566676616668701, \"percentile_inc_nulls\": 0.5310807228088379, \"value_count\": 64, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 256.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4537811279296875, \"percentile_inc_nulls\": 0.5285894870758057, \"value_count\": 63, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 126.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.44667935371398926, \"percentile_inc_nulls\": 0.5224603414535522, \"value_count\": 62, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 310.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4438844323158264, \"percentile_inc_nulls\": 0.5200482606887817, \"value_count\": 61, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 122.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.43976080417633057, \"percentile_inc_nulls\": 0.5164893865585327, \"value_count\": 60, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.434354305267334, \"percentile_inc_nulls\": 0.511823296546936, \"value_count\": 59, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 236.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.43169689178466797, \"percentile_inc_nulls\": 0.5095298290252686, \"value_count\": 58, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 116.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4303910732269287, \"percentile_inc_nulls\": 0.5084028244018555, \"value_count\": 57, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 57.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4226936101913452, \"percentile_inc_nulls\": 0.5017596483230591, \"value_count\": 56, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 336.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4176536798477173, \"percentile_inc_nulls\": 0.4974099397659302, \"value_count\": 55, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 220.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4151794910430908, \"percentile_inc_nulls\": 0.4952746033668518, \"value_count\": 54, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 108.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4115369915962219, \"percentile_inc_nulls\": 0.49213099479675293, \"value_count\": 53, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 159.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4067718982696533, \"percentile_inc_nulls\": 0.4880185127258301, \"value_count\": 52, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 208.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.40093010663986206, \"percentile_inc_nulls\": 0.4829767942428589, \"value_count\": 51, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 255.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3952028751373291, \"percentile_inc_nulls\": 0.4780339002609253, \"value_count\": 50, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 250.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3873450756072998, \"percentile_inc_nulls\": 0.47125232219696045, \"value_count\": 49, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 343.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.38294655084609985, \"percentile_inc_nulls\": 0.46745622158050537, \"value_count\": 48, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 192.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3797163963317871, \"percentile_inc_nulls\": 0.4646684527397156, \"value_count\": 47, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 141.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3776087760925293, \"percentile_inc_nulls\": 0.4628494381904602, \"value_count\": 46, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 92.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.37348514795303345, \"percentile_inc_nulls\": 0.45929062366485596, \"value_count\": 45, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3664291501045227, \"percentile_inc_nulls\": 0.4532009959220886, \"value_count\": 44, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 308.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.35854846239089966, \"percentile_inc_nulls\": 0.44639962911605835, \"value_count\": 43, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 344.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3575863242149353, \"percentile_inc_nulls\": 0.4455692172050476, \"value_count\": 42, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 42.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3528899550437927, \"percentile_inc_nulls\": 0.44151610136032104, \"value_count\": 41, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 205.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.34830814599990845, \"percentile_inc_nulls\": 0.4375618100166321, \"value_count\": 40, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.34294748306274414, \"percentile_inc_nulls\": 0.43293529748916626, \"value_count\": 39, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 234.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.33337152004241943, \"percentile_inc_nulls\": 0.4246708154678345, \"value_count\": 38, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 418.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3274381160736084, \"percentile_inc_nulls\": 0.41955000162124634, \"value_count\": 37, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 259.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.32413917779922485, \"percentile_inc_nulls\": 0.41670292615890503, \"value_count\": 36, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3233373761177063, \"percentile_inc_nulls\": 0.41601091623306274, \"value_count\": 35, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 35.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.32022178173065186, \"percentile_inc_nulls\": 0.41332197189331055, \"value_count\": 34, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 136.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3171977996826172, \"percentile_inc_nulls\": 0.41071218252182007, \"value_count\": 33, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3149985074996948, \"percentile_inc_nulls\": 0.40881413221359253, \"value_count\": 32, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 96.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.306476354598999, \"percentile_inc_nulls\": 0.4014591574668884, \"value_count\": 31, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 372.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3030400276184082, \"percentile_inc_nulls\": 0.3984934091567993, \"value_count\": 30, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 150.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2957320809364319, \"percentile_inc_nulls\": 0.39218634366989136, \"value_count\": 29, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 319.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2880346179008484, \"percentile_inc_nulls\": 0.3855431079864502, \"value_count\": 28, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 336.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.28370481729507446, \"percentile_inc_nulls\": 0.38180631399154663, \"value_count\": 27, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 189.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.27834415435791016, \"percentile_inc_nulls\": 0.3771798014640808, \"value_count\": 26, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 234.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2737623453140259, \"percentile_inc_nulls\": 0.37322551012039185, \"value_count\": 25, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.26826417446136475, \"percentile_inc_nulls\": 0.368480384349823, \"value_count\": 24, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 240.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.265102744102478, \"percentile_inc_nulls\": 0.3657519221305847, \"value_count\": 23, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 138.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2565348148345947, \"percentile_inc_nulls\": 0.35835736989974976, \"value_count\": 22, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 374.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.25172388553619385, \"percentile_inc_nulls\": 0.3542053699493408, \"value_count\": 21, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 210.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.24530941247940063, \"percentile_inc_nulls\": 0.3486694097518921, \"value_count\": 20, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 280.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.24095666408538818, \"percentile_inc_nulls\": 0.3449128270149231, \"value_count\": 19, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 190.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.23229706287384033, \"percentile_inc_nulls\": 0.3374391794204712, \"value_count\": 18, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 378.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.22372913360595703, \"percentile_inc_nulls\": 0.330044686794281, \"value_count\": 17, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 374.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.21346592903137207, \"percentile_inc_nulls\": 0.3211870789527893, \"value_count\": 16, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 448.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.20418775081634521, \"percentile_inc_nulls\": 0.3131796717643738, \"value_count\": 15, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 405.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.18590641021728516, \"percentile_inc_nulls\": 0.297402024269104, \"value_count\": 14, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 798.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.17339807748794556, \"percentile_inc_nulls\": 0.2866068482398987, \"value_count\": 13, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 546.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.15635383129119873, \"percentile_inc_nulls\": 0.2718968987464905, \"value_count\": 12, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 744.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.14274585247039795, \"percentile_inc_nulls\": 0.2601526379585266, \"value_count\": 11, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 594.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.12739688158035278, \"percentile_inc_nulls\": 0.24690574407577515, \"value_count\": 10, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 670.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.114407479763031, \"percentile_inc_nulls\": 0.23569536209106445, \"value_count\": 9, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 567.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.09846282005310059, \"percentile_inc_nulls\": 0.22193443775177002, \"value_count\": 8, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 696.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.08531302213668823, \"percentile_inc_nulls\": 0.21058565378189087, \"value_count\": 7, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 574.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.07129275798797607, \"percentile_inc_nulls\": 0.19848549365997314, \"value_count\": 6, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 612.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.05594372749328613, \"percentile_inc_nulls\": 0.18523865938186646, \"value_count\": 5, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 670.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.03862452507019043, \"percentile_inc_nulls\": 0.17029142379760742, \"value_count\": 4, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 756.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.021717727184295654, \"percentile_inc_nulls\": 0.15570008754730225, \"value_count\": 3, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 738.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.008063971996307373, \"percentile_inc_nulls\": 0.1439163088798523, \"value_count\": 2, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 596.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.0, \"percentile_inc_nulls\": 0.13695675134658813, \"value_count\": 1, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 352.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.9468053579330444, \"percentile_inc_nulls\": 0.9540907144546509, \"value_count\": 2322, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2322.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.9294174313545227, \"percentile_inc_nulls\": 0.9390841722488403, \"value_count\": 759, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 759.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.9164051413536072, \"percentile_inc_nulls\": 0.9278540015220642, \"value_count\": 568, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 568.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.9061418771743774, \"percentile_inc_nulls\": 0.9189963936805725, \"value_count\": 448, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 448.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8960619568824768, \"percentile_inc_nulls\": 0.9102969765663147, \"value_count\": 440, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 440.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8866005539894104, \"percentile_inc_nulls\": 0.9021313786506653, \"value_count\": 413, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 413.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8786511421203613, \"percentile_inc_nulls\": 0.8952706456184387, \"value_count\": 347, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 347.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8713431358337402, \"percentile_inc_nulls\": 0.8889635801315308, \"value_count\": 319, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 319.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8641039133071899, \"percentile_inc_nulls\": 0.8827158212661743, \"value_count\": 316, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 316.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8575061559677124, \"percentile_inc_nulls\": 0.8770216107368469, \"value_count\": 288, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 288.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8509541749954224, \"percentile_inc_nulls\": 0.8713669776916504, \"value_count\": 286, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 286.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8450894355773926, \"percentile_inc_nulls\": 0.8663055300712585, \"value_count\": 256, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 256.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8394080400466919, \"percentile_inc_nulls\": 0.861402153968811, \"value_count\": 248, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 248.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8340702056884766, \"percentile_inc_nulls\": 0.8567954301834106, \"value_count\": 233, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 233.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8289615511894226, \"percentile_inc_nulls\": 0.8523864150047302, \"value_count\": 223, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 223.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8242193460464478, \"percentile_inc_nulls\": 0.8482937216758728, \"value_count\": 207, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 207.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8195459246635437, \"percentile_inc_nulls\": 0.8442603349685669, \"value_count\": 204, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 204.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8149412274360657, \"percentile_inc_nulls\": 0.8402862548828125, \"value_count\": 201, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 201.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8103594779968262, \"percentile_inc_nulls\": 0.8363320231437683, \"value_count\": 200, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8059608936309814, \"percentile_inc_nulls\": 0.8325358629226685, \"value_count\": 192, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 192.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8016998767852783, \"percentile_inc_nulls\": 0.8288583755493164, \"value_count\": 186, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 186.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7975762486457825, \"percentile_inc_nulls\": 0.8252995014190674, \"value_count\": 180, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7894206047058105, \"percentile_inc_nulls\": 0.818260908126831, \"value_count\": 178, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 356.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7853657603263855, \"percentile_inc_nulls\": 0.8147613406181335, \"value_count\": 177, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 177.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7814711928367615, \"percentile_inc_nulls\": 0.8114002346992493, \"value_count\": 170, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 170.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.777599573135376, \"percentile_inc_nulls\": 0.8080588579177856, \"value_count\": 169, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 169.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7738654613494873, \"percentile_inc_nulls\": 0.804836094379425, \"value_count\": 163, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 163.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7701541781425476, \"percentile_inc_nulls\": 0.8016331195831299, \"value_count\": 162, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 162.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7665345668792725, \"percentile_inc_nulls\": 0.7985092401504517, \"value_count\": 158, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 158.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7629378437995911, \"percentile_inc_nulls\": 0.7954051494598389, \"value_count\": 157, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 157.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7593869566917419, \"percentile_inc_nulls\": 0.7923405170440674, \"value_count\": 155, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 155.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7489404678344727, \"percentile_inc_nulls\": 0.7833247780799866, \"value_count\": 152, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 456.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7455728054046631, \"percentile_inc_nulls\": 0.7804183959960938, \"value_count\": 147, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 147.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7422968149185181, \"percentile_inc_nulls\": 0.7775910496711731, \"value_count\": 143, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 143.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7390437722206116, \"percentile_inc_nulls\": 0.7747834920883179, \"value_count\": 142, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 142.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7358136177062988, \"percentile_inc_nulls\": 0.7719957232475281, \"value_count\": 141, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 141.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7326292991638184, \"percentile_inc_nulls\": 0.7692475318908691, \"value_count\": 139, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 139.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7294678688049316, \"percentile_inc_nulls\": 0.7665190696716309, \"value_count\": 138, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 138.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7263522148132324, \"percentile_inc_nulls\": 0.7638301253318787, \"value_count\": 136, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 136.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7233282327651978, \"percentile_inc_nulls\": 0.7612202763557434, \"value_count\": 132, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.717463493347168, \"percentile_inc_nulls\": 0.7561588287353516, \"value_count\": 128, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 256.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7116446495056152, \"percentile_inc_nulls\": 0.751136839389801, \"value_count\": 127, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 254.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7087581157684326, \"percentile_inc_nulls\": 0.7486456632614136, \"value_count\": 126, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 126.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7001672387123108, \"percentile_inc_nulls\": 0.7412313222885132, \"value_count\": 125, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 375.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.697372317314148, \"percentile_inc_nulls\": 0.7388192415237427, \"value_count\": 122, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 122.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6946003437042236, \"percentile_inc_nulls\": 0.7364269495010376, \"value_count\": 121, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 121.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6891021728515625, \"percentile_inc_nulls\": 0.731681764125824, \"value_count\": 120, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 240.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 1.0, \"percentile_inc_nulls\": 1.0, \"value_count\": 119, \"group_name\": \"_birth_place_\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 238.0, \"distinct_value_count\": 2373}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"percentile_ex_nulls\", \"type\": \"quantitative\"}, {\"field\": \"percentile_inc_nulls\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"percentile_ex_nulls\", \"sort\": \"descending\", \"title\": \"Percentile\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Count of values\", \"type\": \"quantitative\"}}, \"title\": {\"text\": \"Distribution of counts of values in column \\\"birth_place\\\"\", \"subtitle\": \"In this col, 6,927 values (13.7%) are null and there are 2373 distinct values\"}}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 2322, \"group_name\": \"_birth_place_\", \"value\": \"london\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 759, \"group_name\": \"_birth_place_\", \"value\": \"westminster\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 568, \"group_name\": \"_birth_place_\", \"value\": \"birmingham\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 448, \"group_name\": \"_birth_place_\", \"value\": \"manchester\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 440, \"group_name\": \"_birth_place_\", \"value\": \"liverpool\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Top 5 values by value count\"}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"_birth_place_\", \"value\": \"watermead\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"_birth_place_\", \"value\": \"clutton\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"_birth_place_\", \"value\": \"counthorpe and creeton\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"_birth_place_\", \"value\": \"rhos-on-sea\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"_birth_place_\", \"value\": \"wem rural\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"scale\": {\"domain\": [0, 2322]}, \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Bottom 5 values by value count\"}]}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\"}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.VConcatChart(...)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from splink import splink_datasets, DuckDBAPI\n",
    "from splink.exploratory import profile_columns\n",
    "\n",
    "df = splink_datasets.historical_50k\n",
    "df = df[[\"unique_id\", \"full_name\", \"dob\", \"birth_place\"]]\n",
    "chart = profile_columns(df, db_api=DuckDBAPI(), top_n=5, bottom_n=5)\n",
    "chart\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "!!! info \"At a glance\"\n",
    "    **Useful for:** Looking at the distribution of values in columns. \n",
    "\n",
    "    **API Documentation:** [profile_columns()](../api_docs/exploratory.md#splink.exploratory.profile_columns)\n",
    "\n",
    "    **What is needed to generate the chart?:** A `linker` with some data."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### What the chart shows\n",
    "\n",
    "The `profile_columns` chart shows 3 charts for each selected column:\n",
    "\n",
    "- The left chart shows the distribution of all values in the column.  It is a summary of the skew of value frequencies. The width of each \"step\" represents the proportion of all (non-null) values with a given count while the height of each \"step\" gives the count of the same given value.  \n",
    "- The middle chart shows the counts of the ten most common values in the column. These correspond to the 10 leftmost \"steps\" in the left chart.\n",
    "- The right chart shows the counts of the ten least common values in the column. These correspond to the 10 rightmost \"steps\" in the left chart."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "??? note \"What the chart tooltip shows\"\n",
    "\n",
    "    ##### Left chart:\n",
    "\n",
    "    ![](./img/profile_columns_tooltip_1.png) \n",
    "\n",
    "    This tooltip shows a number of statistics based on the column value of the \"step\" that the user is hovering over, including:\n",
    "\n",
    "    - The number of occurances of the given value.\n",
    "    - The precentile of the column value (excluding and including null values).\n",
    "    - The total number of rows in the column (excluding and including null values).\n",
    "\n",
    "    ##### Middle and right chart:\n",
    "\n",
    "    ![](./img/profile_columns_tooltip_2.png)\n",
    "\n",
    "    This tooltip shows a number of statistics based on the column value of the bar that the user is hovering over, including:\n",
    "\n",
    "    - The column value\n",
    "    - The count of the column value.\n",
    "    - The total number of rows in the column (excluding and including null values)."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to interpret the chart\n",
    "\n",
    "The distribution of values in your data is important for two main reasons:\n",
    "\n",
    "1. Columns with higher cardinality (number of distinct values) are usually more useful for data linking. For instance, date of birth is a much stronger linkage variable than gender.\n",
    "\n",
    "2. The skew of values is important. If you have a `birth_place` column that has 1,000 distinct values, but 75% of them are London, this is much less useful for linkage than if the 1,000 values were equally distributed"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Actions to take as a result of the chart\n",
    "\n",
    "In an ideal world, all of the columns in datasets used for linkage would be high cardinality with a low skew (i.e. many distinct values that are evenly distributed). This is rarely the case with real-life datasets, but there a number of steps to extract the most predictive value, particularly with skewed data.\n",
    "\n",
    "#### Skewed String Columns\n",
    "\n",
    "Consider the skew of `birth_place` in our example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-3d125f6c5f094bb2baf7594858e4b199.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-3d125f6c5f094bb2baf7594858e4b199.vega-embed details,\n",
       "  #altair-viz-3d125f6c5f094bb2baf7594858e4b199.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-3d125f6c5f094bb2baf7594858e4b199\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-3d125f6c5f094bb2baf7594858e4b199\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-3d125f6c5f094bb2baf7594858e4b199\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"vconcat\": [{\"hconcat\": [{\"mark\": {\"type\": \"line\", \"interpolate\": \"step-after\"}, \"data\": {\"values\": [{\"percentile_ex_nulls\": 0.9468053579330444, \"percentile_inc_nulls\": 0.9540907144546509, \"value_count\": 2322, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2322.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.9294174313545227, \"percentile_inc_nulls\": 0.9390841722488403, \"value_count\": 759, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 759.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.9164051413536072, \"percentile_inc_nulls\": 0.9278540015220642, \"value_count\": 568, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 568.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.9061418771743774, \"percentile_inc_nulls\": 0.9189963936805725, \"value_count\": 448, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 448.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8960619568824768, \"percentile_inc_nulls\": 0.9102969765663147, \"value_count\": 440, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 440.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8866005539894104, \"percentile_inc_nulls\": 0.9021313786506653, \"value_count\": 413, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 413.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8786511421203613, \"percentile_inc_nulls\": 0.8952706456184387, \"value_count\": 347, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 347.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8713431358337402, \"percentile_inc_nulls\": 0.8889635801315308, \"value_count\": 319, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 319.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8641039133071899, \"percentile_inc_nulls\": 0.8827158212661743, \"value_count\": 316, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 316.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8575061559677124, \"percentile_inc_nulls\": 0.8770216107368469, \"value_count\": 288, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 288.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8509541749954224, \"percentile_inc_nulls\": 0.8713669776916504, \"value_count\": 286, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 286.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8450894355773926, \"percentile_inc_nulls\": 0.8663055300712585, \"value_count\": 256, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 256.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8394080400466919, \"percentile_inc_nulls\": 0.861402153968811, \"value_count\": 248, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 248.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8340702056884766, \"percentile_inc_nulls\": 0.8567954301834106, \"value_count\": 233, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 233.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8289615511894226, \"percentile_inc_nulls\": 0.8523864150047302, \"value_count\": 223, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 223.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8242193460464478, \"percentile_inc_nulls\": 0.8482937216758728, \"value_count\": 207, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 207.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8195459246635437, \"percentile_inc_nulls\": 0.8442603349685669, \"value_count\": 204, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 204.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8149412274360657, \"percentile_inc_nulls\": 0.8402862548828125, \"value_count\": 201, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 201.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8103594779968262, \"percentile_inc_nulls\": 0.8363320231437683, \"value_count\": 200, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8059608936309814, \"percentile_inc_nulls\": 0.8325358629226685, \"value_count\": 192, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 192.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.8016998767852783, \"percentile_inc_nulls\": 0.8288583755493164, \"value_count\": 186, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 186.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7975762486457825, \"percentile_inc_nulls\": 0.8252995014190674, \"value_count\": 180, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7894206047058105, \"percentile_inc_nulls\": 0.818260908126831, \"value_count\": 178, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 356.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7853657603263855, \"percentile_inc_nulls\": 0.8147613406181335, \"value_count\": 177, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 177.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7814711928367615, \"percentile_inc_nulls\": 0.8114002346992493, \"value_count\": 170, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 170.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.777599573135376, \"percentile_inc_nulls\": 0.8080588579177856, \"value_count\": 169, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 169.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7738654613494873, \"percentile_inc_nulls\": 0.804836094379425, \"value_count\": 163, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 163.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7701541781425476, \"percentile_inc_nulls\": 0.8016331195831299, \"value_count\": 162, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 162.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7665345668792725, \"percentile_inc_nulls\": 0.7985092401504517, \"value_count\": 158, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 158.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7629378437995911, \"percentile_inc_nulls\": 0.7954051494598389, \"value_count\": 157, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 157.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7593869566917419, \"percentile_inc_nulls\": 0.7923405170440674, \"value_count\": 155, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 155.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7489404678344727, \"percentile_inc_nulls\": 0.7833247780799866, \"value_count\": 152, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 456.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7455728054046631, \"percentile_inc_nulls\": 0.7804183959960938, \"value_count\": 147, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 147.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7422968149185181, \"percentile_inc_nulls\": 0.7775910496711731, \"value_count\": 143, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 143.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7390437722206116, \"percentile_inc_nulls\": 0.7747834920883179, \"value_count\": 142, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 142.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7358136177062988, \"percentile_inc_nulls\": 0.7719957232475281, \"value_count\": 141, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 141.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7326292991638184, \"percentile_inc_nulls\": 0.7692475318908691, \"value_count\": 139, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 139.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7294678688049316, \"percentile_inc_nulls\": 0.7665190696716309, \"value_count\": 138, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 138.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7263522148132324, \"percentile_inc_nulls\": 0.7638301253318787, \"value_count\": 136, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 136.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7233282327651978, \"percentile_inc_nulls\": 0.7612202763557434, \"value_count\": 132, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.717463493347168, \"percentile_inc_nulls\": 0.7561588287353516, \"value_count\": 128, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 256.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7116446495056152, \"percentile_inc_nulls\": 0.751136839389801, \"value_count\": 127, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 254.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7087581157684326, \"percentile_inc_nulls\": 0.7486456632614136, \"value_count\": 126, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 126.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.7001672387123108, \"percentile_inc_nulls\": 0.7412313222885132, \"value_count\": 125, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 375.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.697372317314148, \"percentile_inc_nulls\": 0.7388192415237427, \"value_count\": 122, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 122.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6946003437042236, \"percentile_inc_nulls\": 0.7364269495010376, \"value_count\": 121, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 121.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6891021728515625, \"percentile_inc_nulls\": 0.731681764125824, \"value_count\": 120, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 240.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4303910732269287, \"percentile_inc_nulls\": 0.5084028244018555, \"value_count\": 57, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 57.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4226936101913452, \"percentile_inc_nulls\": 0.5017596483230591, \"value_count\": 56, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 336.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4176536798477173, \"percentile_inc_nulls\": 0.4974099397659302, \"value_count\": 55, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 220.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4151794910430908, \"percentile_inc_nulls\": 0.4952746033668518, \"value_count\": 54, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 108.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4115369915962219, \"percentile_inc_nulls\": 0.49213099479675293, \"value_count\": 53, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 159.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4067718982696533, \"percentile_inc_nulls\": 0.4880185127258301, \"value_count\": 52, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 208.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.40093010663986206, \"percentile_inc_nulls\": 0.4829767942428589, \"value_count\": 51, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 255.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3952028751373291, \"percentile_inc_nulls\": 0.4780339002609253, \"value_count\": 50, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 250.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3873450756072998, \"percentile_inc_nulls\": 0.47125232219696045, \"value_count\": 49, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 343.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.38294655084609985, \"percentile_inc_nulls\": 0.46745622158050537, \"value_count\": 48, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 192.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3797163963317871, \"percentile_inc_nulls\": 0.4646684527397156, \"value_count\": 47, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 141.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3776087760925293, \"percentile_inc_nulls\": 0.4628494381904602, \"value_count\": 46, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 92.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.37348514795303345, \"percentile_inc_nulls\": 0.45929062366485596, \"value_count\": 45, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3664291501045227, \"percentile_inc_nulls\": 0.4532009959220886, \"value_count\": 44, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 308.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.35854846239089966, \"percentile_inc_nulls\": 0.44639962911605835, \"value_count\": 43, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 344.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3575863242149353, \"percentile_inc_nulls\": 0.4455692172050476, \"value_count\": 42, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 42.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3528899550437927, \"percentile_inc_nulls\": 0.44151610136032104, \"value_count\": 41, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 205.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.34830814599990845, \"percentile_inc_nulls\": 0.4375618100166321, \"value_count\": 40, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.34294748306274414, \"percentile_inc_nulls\": 0.43293529748916626, \"value_count\": 39, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 234.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.33337152004241943, \"percentile_inc_nulls\": 0.4246708154678345, \"value_count\": 38, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 418.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3274381160736084, \"percentile_inc_nulls\": 0.41955000162124634, \"value_count\": 37, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 259.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.32413917779922485, \"percentile_inc_nulls\": 0.41670292615890503, \"value_count\": 36, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3233373761177063, \"percentile_inc_nulls\": 0.41601091623306274, \"value_count\": 35, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 35.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.32022178173065186, \"percentile_inc_nulls\": 0.41332197189331055, \"value_count\": 34, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 136.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3171977996826172, \"percentile_inc_nulls\": 0.41071218252182007, \"value_count\": 33, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3149985074996948, \"percentile_inc_nulls\": 0.40881413221359253, \"value_count\": 32, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 96.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.306476354598999, \"percentile_inc_nulls\": 0.4014591574668884, \"value_count\": 31, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 372.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.3030400276184082, \"percentile_inc_nulls\": 0.3984934091567993, \"value_count\": 30, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 150.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2957320809364319, \"percentile_inc_nulls\": 0.39218634366989136, \"value_count\": 29, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 319.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2880346179008484, \"percentile_inc_nulls\": 0.3855431079864502, \"value_count\": 28, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 336.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.28370481729507446, \"percentile_inc_nulls\": 0.38180631399154663, \"value_count\": 27, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 189.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.27834415435791016, \"percentile_inc_nulls\": 0.3771798014640808, \"value_count\": 26, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 234.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2737623453140259, \"percentile_inc_nulls\": 0.37322551012039185, \"value_count\": 25, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.26826417446136475, \"percentile_inc_nulls\": 0.368480384349823, \"value_count\": 24, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 240.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.265102744102478, \"percentile_inc_nulls\": 0.3657519221305847, \"value_count\": 23, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 138.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.2565348148345947, \"percentile_inc_nulls\": 0.35835736989974976, \"value_count\": 22, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 374.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.25172388553619385, \"percentile_inc_nulls\": 0.3542053699493408, \"value_count\": 21, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 210.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.24530941247940063, \"percentile_inc_nulls\": 0.3486694097518921, \"value_count\": 20, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 280.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.24095666408538818, \"percentile_inc_nulls\": 0.3449128270149231, \"value_count\": 19, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 190.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.23229706287384033, \"percentile_inc_nulls\": 0.3374391794204712, \"value_count\": 18, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 378.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.22372913360595703, \"percentile_inc_nulls\": 0.330044686794281, \"value_count\": 17, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 374.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.21346592903137207, \"percentile_inc_nulls\": 0.3211870789527893, \"value_count\": 16, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 448.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.20418775081634521, \"percentile_inc_nulls\": 0.3131796717643738, \"value_count\": 15, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 405.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.18590641021728516, \"percentile_inc_nulls\": 0.297402024269104, \"value_count\": 14, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 798.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.17339807748794556, \"percentile_inc_nulls\": 0.2866068482398987, \"value_count\": 13, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 546.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.15635383129119873, \"percentile_inc_nulls\": 0.2718968987464905, \"value_count\": 12, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 744.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.14274585247039795, \"percentile_inc_nulls\": 0.2601526379585266, \"value_count\": 11, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 594.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.12739688158035278, \"percentile_inc_nulls\": 0.24690574407577515, \"value_count\": 10, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 670.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.114407479763031, \"percentile_inc_nulls\": 0.23569536209106445, \"value_count\": 9, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 567.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.09846282005310059, \"percentile_inc_nulls\": 0.22193443775177002, \"value_count\": 8, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 696.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.08531302213668823, \"percentile_inc_nulls\": 0.21058565378189087, \"value_count\": 7, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 574.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.07129275798797607, \"percentile_inc_nulls\": 0.19848549365997314, \"value_count\": 6, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 612.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.05594372749328613, \"percentile_inc_nulls\": 0.18523865938186646, \"value_count\": 5, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 670.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.03862452507019043, \"percentile_inc_nulls\": 0.17029142379760742, \"value_count\": 4, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 756.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.021717727184295654, \"percentile_inc_nulls\": 0.15570008754730225, \"value_count\": 3, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 738.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.008063971996307373, \"percentile_inc_nulls\": 0.1439163088798523, \"value_count\": 2, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 596.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.0, \"percentile_inc_nulls\": 0.13695675134658813, \"value_count\": 1, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 352.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6836498975753784, \"percentile_inc_nulls\": 0.7269761562347412, \"value_count\": 119, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 238.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6809465885162354, \"percentile_inc_nulls\": 0.7246431112289429, \"value_count\": 118, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 118.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6782662868499756, \"percentile_inc_nulls\": 0.72232985496521, \"value_count\": 117, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 117.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6756088137626648, \"percentile_inc_nulls\": 0.7200363874435425, \"value_count\": 116, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 116.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6678426265716553, \"percentile_inc_nulls\": 0.7133338451385498, \"value_count\": 113, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 339.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6601452827453613, \"percentile_inc_nulls\": 0.7066906690597534, \"value_count\": 112, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 336.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6551511287689209, \"percentile_inc_nulls\": 0.7023804783821106, \"value_count\": 109, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 218.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.650202751159668, \"percentile_inc_nulls\": 0.6981098651885986, \"value_count\": 108, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 216.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6453460454940796, \"percentile_inc_nulls\": 0.6939183473587036, \"value_count\": 106, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 212.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6406267881393433, \"percentile_inc_nulls\": 0.6898453831672668, \"value_count\": 103, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 206.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6359533667564392, \"percentile_inc_nulls\": 0.6858119964599609, \"value_count\": 102, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 204.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6336395740509033, \"percentile_inc_nulls\": 0.6838151216506958, \"value_count\": 101, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 101.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.629057765007019, \"percentile_inc_nulls\": 0.6798608303070068, \"value_count\": 100, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 200.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6245217323303223, \"percentile_inc_nulls\": 0.675946056842804, \"value_count\": 99, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 198.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.613296389579773, \"percentile_inc_nulls\": 0.6662580966949463, \"value_count\": 98, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 490.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6066298484802246, \"percentile_inc_nulls\": 0.6605045795440674, \"value_count\": 97, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 291.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6044305562973022, \"percentile_inc_nulls\": 0.6586065292358398, \"value_count\": 96, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 96.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.6022771596908569, \"percentile_inc_nulls\": 0.6567479968070984, \"value_count\": 94, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 94.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.598016083240509, \"percentile_inc_nulls\": 0.6530705094337463, \"value_count\": 93, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 186.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5938008427619934, \"percentile_inc_nulls\": 0.6494325399398804, \"value_count\": 92, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 184.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5917619466781616, \"percentile_inc_nulls\": 0.6476728916168213, \"value_count\": 89, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 89.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5857139825820923, \"percentile_inc_nulls\": 0.6424532532691956, \"value_count\": 88, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 264.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.577924907207489, \"percentile_inc_nulls\": 0.6357309818267822, \"value_count\": 85, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 340.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5741220116615295, \"percentile_inc_nulls\": 0.6324489116668701, \"value_count\": 83, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 166.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.562850832939148, \"percentile_inc_nulls\": 0.6227213144302368, \"value_count\": 82, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 492.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.557283878326416, \"percentile_inc_nulls\": 0.6179168820381165, \"value_count\": 81, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 243.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5464250445365906, \"percentile_inc_nulls\": 0.608545184135437, \"value_count\": 79, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 474.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5392774343490601, \"percentile_inc_nulls\": 0.6023765206336975, \"value_count\": 78, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 312.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5304574966430664, \"percentile_inc_nulls\": 0.5947645306587219, \"value_count\": 77, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 385.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5234931707382202, \"percentile_inc_nulls\": 0.5887539982795715, \"value_count\": 76, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 304.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5150167942047119, \"percentile_inc_nulls\": 0.5814385414123535, \"value_count\": 74, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 370.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5083274245262146, \"percentile_inc_nulls\": 0.575665295124054, \"value_count\": 73, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 292.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.5000801682472229, \"percentile_inc_nulls\": 0.5685476064682007, \"value_count\": 72, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 360.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4919474720954895, \"percentile_inc_nulls\": 0.561528742313385, \"value_count\": 71, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 355.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4871366024017334, \"percentile_inc_nulls\": 0.5573767423629761, \"value_count\": 70, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 210.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4792330265045166, \"percentile_inc_nulls\": 0.5505555868148804, \"value_count\": 69, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 345.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4761173725128174, \"percentile_inc_nulls\": 0.547866702079773, \"value_count\": 68, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 136.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.47151267528533936, \"percentile_inc_nulls\": 0.5438926219940186, \"value_count\": 67, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 201.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4684886932373047, \"percentile_inc_nulls\": 0.5412827730178833, \"value_count\": 66, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4625323414802551, \"percentile_inc_nulls\": 0.5361422300338745, \"value_count\": 65, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 260.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4566676616668701, \"percentile_inc_nulls\": 0.5310807228088379, \"value_count\": 64, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 256.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4537811279296875, \"percentile_inc_nulls\": 0.5285894870758057, \"value_count\": 63, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 126.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.44667935371398926, \"percentile_inc_nulls\": 0.5224603414535522, \"value_count\": 62, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 310.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.4438844323158264, \"percentile_inc_nulls\": 0.5200482606887817, \"value_count\": 61, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 122.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.43976080417633057, \"percentile_inc_nulls\": 0.5164893865585327, \"value_count\": 60, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.434354305267334, \"percentile_inc_nulls\": 0.511823296546936, \"value_count\": 59, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 236.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 0.43169689178466797, \"percentile_inc_nulls\": 0.5095298290252686, \"value_count\": 58, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 116.0, \"distinct_value_count\": 2373}, {\"percentile_ex_nulls\": 1.0, \"percentile_inc_nulls\": 1.0, \"value_count\": 2322, \"group_name\": \"birth_place\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2322.0, \"distinct_value_count\": 2373}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"percentile_ex_nulls\", \"type\": \"quantitative\"}, {\"field\": \"percentile_inc_nulls\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"percentile_ex_nulls\", \"sort\": \"descending\", \"title\": \"Percentile\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Count of values\", \"type\": \"quantitative\"}}, \"title\": {\"text\": \"Distribution of counts of values in column birth_place\", \"subtitle\": \"In this col, 6,927 values (13.7%) are null and there are 2373 distinct values\"}}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 2322, \"group_name\": \"birth_place\", \"value\": \"london\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 759, \"group_name\": \"birth_place\", \"value\": \"westminster\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 568, \"group_name\": \"birth_place\", \"value\": \"birmingham\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 448, \"group_name\": \"birth_place\", \"value\": \"manchester\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 440, \"group_name\": \"birth_place\", \"value\": \"liverpool\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 413, \"group_name\": \"birth_place\", \"value\": \"edinburgh\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 347, \"group_name\": \"birth_place\", \"value\": \"leeds\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 319, \"group_name\": \"birth_place\", \"value\": \"camden\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 316, \"group_name\": \"birth_place\", \"value\": \"sheffield\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 288, \"group_name\": \"birth_place\", \"value\": \"glasgow\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Top 10 values by value count\"}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"drayton bassett\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"butetown\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"cinderford\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"peterchurch\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"hailsham\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"malpas\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"seneley green\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"st. dogmaels\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"bold\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}, {\"value_count\": 1, \"group_name\": \"birth_place\", \"value\": \"chaddesley corbett\", \"total_non_null_rows\": 43651, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 2373}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"scale\": {\"domain\": [0, 2322]}, \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Bottom 10 values by value count\"}]}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\"}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.VConcatChart(...)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "profile_columns(df, column_expressions=\"birth_place\", db_api=DuckDBAPI())"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we can see that \"london\" is the most common value, with many multiples more entires than the other values. In this case two records both having a `birth_place` of \"london\" gives far less evidence for a match than both having a rarer `birth_place` (e.g. \"felthorpe\").\n",
    "\n",
    "To take this skew into account, we can build Splink models with **Term Frequency Adjustments**. These adjustments will increase the amount of evidence for rare matching values and reduce the amount of evidence for common matching values.\n",
    "\n",
    "To understand how these work in more detail, check out the [Term Frequency Adjustments Topic Guide](../topic_guides/comparisons/term-frequency.md)\n",
    "\n",
    "<hr>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Skewed Date Columns\n",
    "\n",
    "Dates can also be skewed, but tend to be dealt with slightly differently.\n",
    "\n",
    "Consider the `dob` column from our example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-00f04bf5724c40529732a0cf201a8e85.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-00f04bf5724c40529732a0cf201a8e85.vega-embed details,\n",
       "  #altair-viz-00f04bf5724c40529732a0cf201a8e85.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-00f04bf5724c40529732a0cf201a8e85\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-00f04bf5724c40529732a0cf201a8e85\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-00f04bf5724c40529732a0cf201a8e85\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"vconcat\": [{\"hconcat\": [{\"mark\": {\"type\": \"line\", \"interpolate\": \"step-after\"}, \"data\": {\"values\": [{\"percentile_ex_nulls\": 0.7075941562652588, \"percentile_inc_nulls\": 0.7735181450843811, \"value_count\": 27, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 81.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7056030631065369, \"percentile_inc_nulls\": 0.7719759941101074, \"value_count\": 26, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 78.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7030503749847412, \"percentile_inc_nulls\": 0.7699987888336182, \"value_count\": 25, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 100.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6993745565414429, \"percentile_inc_nulls\": 0.7671517133712769, \"value_count\": 24, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6964390277862549, \"percentile_inc_nulls\": 0.7648780345916748, \"value_count\": 23, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 115.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6913847923278809, \"percentile_inc_nulls\": 0.7609632611274719, \"value_count\": 22, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 198.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6806637048721313, \"percentile_inc_nulls\": 0.752659261226654, \"value_count\": 21, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 420.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.676068902015686, \"percentile_inc_nulls\": 0.749100387096405, \"value_count\": 20, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6649138927459717, \"percentile_inc_nulls\": 0.7404602766036987, \"value_count\": 19, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 437.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6571027040481567, \"percentile_inc_nulls\": 0.7344102263450623, \"value_count\": 18, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 306.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6462540030479431, \"percentile_inc_nulls\": 0.726007342338562, \"value_count\": 17, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 425.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6368602514266968, \"percentile_inc_nulls\": 0.7187314629554749, \"value_count\": 16, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 368.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6226930618286133, \"percentile_inc_nulls\": 0.7077583074569702, \"value_count\": 15, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 555.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.6012507677078247, \"percentile_inc_nulls\": 0.6911503076553345, \"value_count\": 14, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 840.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.5826675295829773, \"percentile_inc_nulls\": 0.6767566800117493, \"value_count\": 13, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 728.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.549891471862793, \"percentile_inc_nulls\": 0.6513701677322388, \"value_count\": 12, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1284.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.5184428691864014, \"percentile_inc_nulls\": 0.627011775970459, \"value_count\": 11, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1232.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.4778558015823364, \"percentile_inc_nulls\": 0.5955751538276672, \"value_count\": 10, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1590.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.4351244568824768, \"percentile_inc_nulls\": 0.5624777674674988, \"value_count\": 9, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1674.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.3899936079978943, \"percentile_inc_nulls\": 0.5275218486785889, \"value_count\": 8, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1768.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.34603703022003174, \"percentile_inc_nulls\": 0.49347543716430664, \"value_count\": 7, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1722.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.30437779426574707, \"percentile_inc_nulls\": 0.46120840311050415, \"value_count\": 6, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1632.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.26340776681900024, \"percentile_inc_nulls\": 0.42947524785995483, \"value_count\": 5, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1605.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.22430121898651123, \"percentile_inc_nulls\": 0.3991854190826416, \"value_count\": 4, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1532.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.18179959058761597, \"percentile_inc_nulls\": 0.3662659525871277, \"value_count\": 3, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 1665.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.12528395652770996, \"percentile_inc_nulls\": 0.32249200344085693, \"value_count\": 2, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 2214.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.0, \"percentile_inc_nulls\": 0.22545373439788818, \"value_count\": 1, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 4908.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7847351431846619, \"percentile_inc_nulls\": 0.8332674503326416, \"value_count\": 72, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7829228043556213, \"percentile_inc_nulls\": 0.8318636417388916, \"value_count\": 71, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 71.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7811869978904724, \"percentile_inc_nulls\": 0.8305191993713379, \"value_count\": 68, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 68.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7778174877166748, \"percentile_inc_nulls\": 0.8279093503952026, \"value_count\": 66, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 132.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7762093544006348, \"percentile_inc_nulls\": 0.8266637325286865, \"value_count\": 63, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 63.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7746266722679138, \"percentile_inc_nulls\": 0.8254379034042358, \"value_count\": 62, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 62.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7730695605278015, \"percentile_inc_nulls\": 0.8242318630218506, \"value_count\": 61, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 61.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.768474817276001, \"percentile_inc_nulls\": 0.8206729888916016, \"value_count\": 60, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7669942378997803, \"percentile_inc_nulls\": 0.8195262551307678, \"value_count\": 58, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 58.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7641863226890564, \"percentile_inc_nulls\": 0.8173514008522034, \"value_count\": 55, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 110.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7628079056739807, \"percentile_inc_nulls\": 0.8162837624549866, \"value_count\": 54, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 54.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7614805698394775, \"percentile_inc_nulls\": 0.8152556419372559, \"value_count\": 52, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 52.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7576515674591064, \"percentile_inc_nulls\": 0.8122899532318115, \"value_count\": 50, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 150.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7564007639884949, \"percentile_inc_nulls\": 0.8113211393356323, \"value_count\": 49, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 49.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7551754713058472, \"percentile_inc_nulls\": 0.8103721141815186, \"value_count\": 48, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 48.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7539757490158081, \"percentile_inc_nulls\": 0.8094428777694702, \"value_count\": 47, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 47.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7504531145095825, \"percentile_inc_nulls\": 0.8067144155502319, \"value_count\": 46, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 138.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7482067346572876, \"percentile_inc_nulls\": 0.8049744963645935, \"value_count\": 44, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 88.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7449138164520264, \"percentile_inc_nulls\": 0.8024239540100098, \"value_count\": 43, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 129.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7438417673110962, \"percentile_inc_nulls\": 0.801593542098999, \"value_count\": 42, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 42.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.741748571395874, \"percentile_inc_nulls\": 0.7999722957611084, \"value_count\": 41, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 82.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7397064566612244, \"percentile_inc_nulls\": 0.7983906269073486, \"value_count\": 40, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 80.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7377153635025024, \"percentile_inc_nulls\": 0.7968484163284302, \"value_count\": 39, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 78.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7367453575134277, \"percentile_inc_nulls\": 0.7960970997810364, \"value_count\": 38, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 38.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7358008623123169, \"percentile_inc_nulls\": 0.795365571975708, \"value_count\": 37, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 37.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7321250438690186, \"percentile_inc_nulls\": 0.7925184965133667, \"value_count\": 36, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7294448018074036, \"percentile_inc_nulls\": 0.7904424667358398, \"value_count\": 35, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 105.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7242373824119568, \"percentile_inc_nulls\": 0.7864091396331787, \"value_count\": 34, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 204.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7225526571273804, \"percentile_inc_nulls\": 0.7851042151451111, \"value_count\": 33, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 66.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7217358350753784, \"percentile_inc_nulls\": 0.7844715118408203, \"value_count\": 32, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 32.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7185705304145813, \"percentile_inc_nulls\": 0.782019853591919, \"value_count\": 31, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 124.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7155073285102844, \"percentile_inc_nulls\": 0.7796472907066345, \"value_count\": 30, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 120.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7118059992790222, \"percentile_inc_nulls\": 0.7767804265022278, \"value_count\": 29, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 145.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7096617817878723, \"percentile_inc_nulls\": 0.7751196026802063, \"value_count\": 28, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 84.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9847862124443054, \"percentile_inc_nulls\": 0.98821622133255, \"value_count\": 596, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 596.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9702361226081848, \"percentile_inc_nulls\": 0.9769464731216431, \"value_count\": 570, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 570.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9574728608131409, \"percentile_inc_nulls\": 0.9670608043670654, \"value_count\": 500, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 500.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9451180696487427, \"percentile_inc_nulls\": 0.957491397857666, \"value_count\": 484, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 484.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9329419136047363, \"percentile_inc_nulls\": 0.9480603933334351, \"value_count\": 477, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 477.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9225781559944153, \"percentile_inc_nulls\": 0.9400331974029541, \"value_count\": 406, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 406.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9122399687767029, \"percentile_inc_nulls\": 0.9320257902145386, \"value_count\": 405, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 405.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.9027185440063477, \"percentile_inc_nulls\": 0.924651026725769, \"value_count\": 373, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 373.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8937842845916748, \"percentile_inc_nulls\": 0.9177310466766357, \"value_count\": 350, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 350.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8850797414779663, \"percentile_inc_nulls\": 0.910988986492157, \"value_count\": 341, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 341.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.876451849937439, \"percentile_inc_nulls\": 0.9043062329292297, \"value_count\": 338, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 338.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8680791258811951, \"percentile_inc_nulls\": 0.8978211879730225, \"value_count\": 328, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 328.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8617995977401733, \"percentile_inc_nulls\": 0.8929573893547058, \"value_count\": 246, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 246.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8560306429862976, \"percentile_inc_nulls\": 0.8884890675544739, \"value_count\": 226, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 226.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.850542426109314, \"percentile_inc_nulls\": 0.8842381834983826, \"value_count\": 215, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 215.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8452329635620117, \"percentile_inc_nulls\": 0.8801257610321045, \"value_count\": 208, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 208.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8408934473991394, \"percentile_inc_nulls\": 0.8767645955085754, \"value_count\": 170, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 170.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8368091583251953, \"percentile_inc_nulls\": 0.8736011981964111, \"value_count\": 160, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 160.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8329547047615051, \"percentile_inc_nulls\": 0.8706156611442566, \"value_count\": 151, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 151.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8294064998626709, \"percentile_inc_nulls\": 0.8678674697875977, \"value_count\": 139, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 139.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8262922763824463, \"percentile_inc_nulls\": 0.8654553294181824, \"value_count\": 122, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 122.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8232546448707581, \"percentile_inc_nulls\": 0.8631025552749634, \"value_count\": 119, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 119.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8202425241470337, \"percentile_inc_nulls\": 0.860769510269165, \"value_count\": 118, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 118.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8173069357872009, \"percentile_inc_nulls\": 0.8584957718849182, \"value_count\": 115, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 115.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8143969178199768, \"percentile_inc_nulls\": 0.8562418222427368, \"value_count\": 114, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 114.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8088321685791016, \"percentile_inc_nulls\": 0.8519316911697388, \"value_count\": 109, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 218.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8060753345489502, \"percentile_inc_nulls\": 0.8497963547706604, \"value_count\": 108, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 108.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8034460544586182, \"percentile_inc_nulls\": 0.8477599024772644, \"value_count\": 103, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 103.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.8009700179100037, \"percentile_inc_nulls\": 0.8458420634269714, \"value_count\": 97, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 97.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7986981272697449, \"percentile_inc_nulls\": 0.8440824151039124, \"value_count\": 89, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 89.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7964773178100586, \"percentile_inc_nulls\": 0.8423622846603394, \"value_count\": 87, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 87.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.794282078742981, \"percentile_inc_nulls\": 0.8406619429588318, \"value_count\": 86, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 86.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7921888828277588, \"percentile_inc_nulls\": 0.8390406966209412, \"value_count\": 82, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 82.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 0.7884109616279602, \"percentile_inc_nulls\": 0.8361145257949829, \"value_count\": 74, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 148.0, \"distinct_value_count\": 8985}, {\"percentile_ex_nulls\": 1.0, \"percentile_inc_nulls\": 1.0, \"value_count\": 27, \"group_name\": \"dob\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 81.0, \"distinct_value_count\": 8985}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"percentile_ex_nulls\", \"type\": \"quantitative\"}, {\"field\": \"percentile_inc_nulls\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"percentile_ex_nulls\", \"sort\": \"descending\", \"title\": \"Percentile\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Count of values\", \"type\": \"quantitative\"}}, \"title\": {\"text\": \"Distribution of counts of values in column dob\", \"subtitle\": \"In this col, 11,403 values (22.5%) are null and there are 8985 distinct values\"}}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 596, \"group_name\": \"dob\", \"value\": \"1862-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 570, \"group_name\": \"dob\", \"value\": \"1860-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 500, \"group_name\": \"dob\", \"value\": \"1861-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 484, \"group_name\": \"dob\", \"value\": \"1850-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 477, \"group_name\": \"dob\", \"value\": \"1858-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 406, \"group_name\": \"dob\", \"value\": \"1859-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 405, \"group_name\": \"dob\", \"value\": \"1857-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 373, \"group_name\": \"dob\", \"value\": \"1851-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 350, \"group_name\": \"dob\", \"value\": \"1855-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 341, \"group_name\": \"dob\", \"value\": \"1854-01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Top 10 values by value count\"}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1777-89-09\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1834-70-14\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1865-08-12\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1721-07-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1845-71-19\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1702-02-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1844-12-07\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1810-11-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1881-07-16\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}, {\"value_count\": 1, \"group_name\": \"dob\", \"value\": \"1608-02-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 8985}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"scale\": {\"domain\": [0, 596]}, \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Bottom 10 values by value count\"}]}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\"}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.VConcatChart(...)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "profile_columns(df, column_expressions=\"dob\", db_api=DuckDBAPI())"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we can see a large skew towards dates which are the 1st January. We can narrow down the profiling to show the distribution of month and day to explore this further:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-9c90bf1dbb954fddadfac8cc02ad4fd7.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-9c90bf1dbb954fddadfac8cc02ad4fd7.vega-embed details,\n",
       "  #altair-viz-9c90bf1dbb954fddadfac8cc02ad4fd7.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-9c90bf1dbb954fddadfac8cc02ad4fd7\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-9c90bf1dbb954fddadfac8cc02ad4fd7\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-9c90bf1dbb954fddadfac8cc02ad4fd7\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.17.0?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.17.0\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"vconcat\": [{\"hconcat\": [{\"mark\": {\"type\": \"line\", \"interpolate\": \"step-after\"}, \"data\": {\"values\": [{\"percentile_ex_nulls\": 0.6327760219573975, \"percentile_inc_nulls\": 0.7155680656433105, \"value_count\": 14386, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 14386.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.6230248808860779, \"percentile_inc_nulls\": 0.7080153226852417, \"value_count\": 382, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 382.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.6139630079269409, \"percentile_inc_nulls\": 0.7009965181350708, \"value_count\": 355, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 355.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.604926586151123, \"percentile_inc_nulls\": 0.6939973831176758, \"value_count\": 354, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 354.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5961965322494507, \"percentile_inc_nulls\": 0.6872355937957764, \"value_count\": 342, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 342.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.590172290802002, \"percentile_inc_nulls\": 0.6825695037841797, \"value_count\": 236, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 236.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5845819711685181, \"percentile_inc_nulls\": 0.6782395839691162, \"value_count\": 219, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 219.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5792725086212158, \"percentile_inc_nulls\": 0.6741271018981934, \"value_count\": 208, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 208.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5740395784378052, \"percentile_inc_nulls\": 0.6700739860534668, \"value_count\": 205, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 205.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5688576698303223, \"percentile_inc_nulls\": 0.6660603284835815, \"value_count\": 203, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 203.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5641353130340576, \"percentile_inc_nulls\": 0.6624026298522949, \"value_count\": 185, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 185.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5595149993896484, \"percentile_inc_nulls\": 0.6588239669799805, \"value_count\": 181, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 181.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5555583834648132, \"percentile_inc_nulls\": 0.6557594537734985, \"value_count\": 155, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 155.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5519336462020874, \"percentile_inc_nulls\": 0.6529518365859985, \"value_count\": 142, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 142.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5451436042785645, \"percentile_inc_nulls\": 0.6476926803588867, \"value_count\": 133, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 266.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.542003870010376, \"percentile_inc_nulls\": 0.6452608108520508, \"value_count\": 123, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 123.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5389406681060791, \"percentile_inc_nulls\": 0.6428881883621216, \"value_count\": 120, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 120.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5360561609268188, \"percentile_inc_nulls\": 0.6406540274620056, \"value_count\": 113, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 113.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.533248245716095, \"percentile_inc_nulls\": 0.6384791731834412, \"value_count\": 110, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 110.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5277345180511475, \"percentile_inc_nulls\": 0.6342085599899292, \"value_count\": 108, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 216.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.52500319480896, \"percentile_inc_nulls\": 0.6320930123329163, \"value_count\": 107, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 107.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5197447538375854, \"percentile_inc_nulls\": 0.6280200481414795, \"value_count\": 103, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 206.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5145883560180664, \"percentile_inc_nulls\": 0.6240262985229492, \"value_count\": 101, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 202.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5120357275009155, \"percentile_inc_nulls\": 0.62204909324646, \"value_count\": 100, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 100.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5069814920425415, \"percentile_inc_nulls\": 0.6181343793869019, \"value_count\": 99, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 198.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.5044798851013184, \"percentile_inc_nulls\": 0.6161967515945435, \"value_count\": 98, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 98.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.4995788335800171, \"percentile_inc_nulls\": 0.6124006509780884, \"value_count\": 96, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 192.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.4972303509712219, \"percentile_inc_nulls\": 0.610581636428833, \"value_count\": 92, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 92.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.49258458614349365, \"percentile_inc_nulls\": 0.6069833040237427, \"value_count\": 91, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 182.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.48804086446762085, \"percentile_inc_nulls\": 0.6034639477729797, \"value_count\": 89, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 178.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.48130184412002563, \"percentile_inc_nulls\": 0.598244309425354, \"value_count\": 88, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 264.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.4746394157409668, \"percentile_inc_nulls\": 0.5930839776992798, \"value_count\": 87, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 261.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.47029995918273926, \"percentile_inc_nulls\": 0.5897228121757507, \"value_count\": 85, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 170.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.4638672471046448, \"percentile_inc_nulls\": 0.584740400314331, \"value_count\": 84, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 252.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.46174854040145874, \"percentile_inc_nulls\": 0.583099365234375, \"value_count\": 83, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 83.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.45128268003463745, \"percentile_inc_nulls\": 0.5749930739402771, \"value_count\": 82, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 410.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.013273775577545166, \"percentile_inc_nulls\": 0.23573487997055054, \"value_count\": 2, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 512.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.0, \"percentile_inc_nulls\": 0.22545373439788818, \"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 520.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.13100188970565796, \"percentile_inc_nulls\": 0.3269208073616028, \"value_count\": 44, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 396.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.1266113519668579, \"percentile_inc_nulls\": 0.32352012395858765, \"value_count\": 43, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 172.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.11481809616088867, \"percentile_inc_nulls\": 0.31438571214675903, \"value_count\": 42, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 462.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.10853862762451172, \"percentile_inc_nulls\": 0.3095219135284424, \"value_count\": 41, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 246.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.10037010908126831, \"percentile_inc_nulls\": 0.303195059299469, \"value_count\": 40, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 320.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.09638798236846924, \"percentile_inc_nulls\": 0.30011069774627686, \"value_count\": 39, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 156.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.09444797039031982, \"percentile_inc_nulls\": 0.29860806465148926, \"value_count\": 38, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 76.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.09067004919052124, \"percentile_inc_nulls\": 0.295681893825531, \"value_count\": 37, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 148.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.0860753059387207, \"percentile_inc_nulls\": 0.29212307929992676, \"value_count\": 36, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 180.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.08071476221084595, \"percentile_inc_nulls\": 0.2879710793495178, \"value_count\": 35, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 210.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.07637524604797363, \"percentile_inc_nulls\": 0.28460991382598877, \"value_count\": 34, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 170.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.07384812831878662, \"percentile_inc_nulls\": 0.2826525568962097, \"value_count\": 33, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 99.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.07221442461013794, \"percentile_inc_nulls\": 0.2813871502876282, \"value_count\": 32, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 64.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.07142311334609985, \"percentile_inc_nulls\": 0.28077423572540283, \"value_count\": 31, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 31.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.07065731287002563, \"percentile_inc_nulls\": 0.2801811099052429, \"value_count\": 30, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 30.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.06621569395065308, \"percentile_inc_nulls\": 0.2767408490180969, \"value_count\": 29, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 174.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.06264197826385498, \"percentile_inc_nulls\": 0.27397286891937256, \"value_count\": 28, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 140.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.0612635612487793, \"percentile_inc_nulls\": 0.27290523052215576, \"value_count\": 27, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 54.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.057281434535980225, \"percentile_inc_nulls\": 0.2698208689689636, \"value_count\": 26, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 156.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.05409061908721924, \"percentile_inc_nulls\": 0.2673494219779968, \"value_count\": 25, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 125.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.05347800254821777, \"percentile_inc_nulls\": 0.26687490940093994, \"value_count\": 24, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 24.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.05112951993942261, \"percentile_inc_nulls\": 0.26505595445632935, \"value_count\": 23, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 92.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.04888319969177246, \"percentile_inc_nulls\": 0.2633160948753357, \"value_count\": 22, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 88.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.04781109094619751, \"percentile_inc_nulls\": 0.26248568296432495, \"value_count\": 21, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 42.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.04627948999404907, \"percentile_inc_nulls\": 0.26129937171936035, \"value_count\": 20, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 60.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.04582005739212036, \"percentile_inc_nulls\": 0.2609434723854065, \"value_count\": 18, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 18.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.04505425691604614, \"percentile_inc_nulls\": 0.2603503465652466, \"value_count\": 15, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 30.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.0446968674659729, \"percentile_inc_nulls\": 0.26007354259490967, \"value_count\": 14, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 14.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.04439055919647217, \"percentile_inc_nulls\": 0.2598363161087036, \"value_count\": 12, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 12.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.0441097617149353, \"percentile_inc_nulls\": 0.2596188187599182, \"value_count\": 11, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 11.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.04388004541397095, \"percentile_inc_nulls\": 0.2594408392906189, \"value_count\": 9, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 9.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.043701350688934326, \"percentile_inc_nulls\": 0.25930243730545044, \"value_count\": 7, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 7.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.042629241943359375, \"percentile_inc_nulls\": 0.2584720849990845, \"value_count\": 6, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 42.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.03994894027709961, \"percentile_inc_nulls\": 0.2563960552215576, \"value_count\": 5, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 105.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.035456299781799316, \"percentile_inc_nulls\": 0.25291627645492554, \"value_count\": 4, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 176.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.026343345642089844, \"percentile_inc_nulls\": 0.24585789442062378, \"value_count\": 3, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 357.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.4430121183395386, \"percentile_inc_nulls\": 0.5685871243476868, \"value_count\": 81, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 324.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.4409700036048889, \"percentile_inc_nulls\": 0.5670053958892822, \"value_count\": 80, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 80.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.43088704347610474, \"percentile_inc_nulls\": 0.5591956973075867, \"value_count\": 79, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 395.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.42491382360458374, \"percentile_inc_nulls\": 0.5545691847801208, \"value_count\": 78, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 234.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.41705167293548584, \"percentile_inc_nulls\": 0.5484795570373535, \"value_count\": 77, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 308.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.411308228969574, \"percentile_inc_nulls\": 0.544031023979187, \"value_count\": 75, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 225.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.4037523865699768, \"percentile_inc_nulls\": 0.5381786823272705, \"value_count\": 74, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 296.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.3962986469268799, \"percentile_inc_nulls\": 0.5324053764343262, \"value_count\": 73, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 292.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.39262282848358154, \"percentile_inc_nulls\": 0.5295583009719849, \"value_count\": 72, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 144.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.3871856927871704, \"percentile_inc_nulls\": 0.5253469944000244, \"value_count\": 71, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 213.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.37467771768569946, \"percentile_inc_nulls\": 0.515658974647522, \"value_count\": 70, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 490.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.36410975456237793, \"percentile_inc_nulls\": 0.5074735879898071, \"value_count\": 69, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 414.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.3519591689109802, \"percentile_inc_nulls\": 0.4980623722076416, \"value_count\": 68, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 476.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.34511804580688477, \"percentile_inc_nulls\": 0.4927636384963989, \"value_count\": 67, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 268.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.34006381034851074, \"percentile_inc_nulls\": 0.4888489246368408, \"value_count\": 66, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 198.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.331767737865448, \"percentile_inc_nulls\": 0.48242318630218506, \"value_count\": 65, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 325.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.3186981678009033, \"percentile_inc_nulls\": 0.4723002314567566, \"value_count\": 64, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 512.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.31226545572280884, \"percentile_inc_nulls\": 0.4673178195953369, \"value_count\": 63, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 252.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.3011869788169861, \"percentile_inc_nulls\": 0.45873701572418213, \"value_count\": 62, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 434.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.29495853185653687, \"percentile_inc_nulls\": 0.45391279458999634, \"value_count\": 61, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 244.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.27198469638824463, \"percentile_inc_nulls\": 0.436118483543396, \"value_count\": 60, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 900.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.26445436477661133, \"percentile_inc_nulls\": 0.43028587102890015, \"value_count\": 59, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 295.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.24964898824691772, \"percentile_inc_nulls\": 0.41881847381591797, \"value_count\": 58, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 580.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.24091893434524536, \"percentile_inc_nulls\": 0.4120566248893738, \"value_count\": 57, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 342.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.22662413120269775, \"percentile_inc_nulls\": 0.40098464488983154, \"value_count\": 56, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 560.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.21820038557052612, \"percentile_inc_nulls\": 0.3944600224494934, \"value_count\": 55, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 330.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.21130824089050293, \"percentile_inc_nulls\": 0.38912177085876465, \"value_count\": 54, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 270.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.20183789730072021, \"percentile_inc_nulls\": 0.3817865252494812, \"value_count\": 53, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 371.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.19121891260147095, \"percentile_inc_nulls\": 0.3735616207122803, \"value_count\": 52, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 416.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.1847096085548401, \"percentile_inc_nulls\": 0.3685199022293091, \"value_count\": 51, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 255.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.17577534914016724, \"percentile_inc_nulls\": 0.3615999221801758, \"value_count\": 50, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 350.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.17077219486236572, \"percentile_inc_nulls\": 0.35772472620010376, \"value_count\": 49, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 196.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.16342055797576904, \"percentile_inc_nulls\": 0.35203051567077637, \"value_count\": 48, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 288.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.15382260084152222, \"percentile_inc_nulls\": 0.3445964455604553, \"value_count\": 47, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 376.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.15029990673065186, \"percentile_inc_nulls\": 0.34186798334121704, \"value_count\": 46, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 138.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 0.14111042022705078, \"percentile_inc_nulls\": 0.33475029468536377, \"value_count\": 45, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 360.0, \"distinct_value_count\": 1340}, {\"percentile_ex_nulls\": 1.0, \"percentile_inc_nulls\": 1.0, \"value_count\": 14386, \"group_name\": \"substr_dob_6_10_\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"sum_tokens_in_value_count_group\": 14386.0, \"distinct_value_count\": 1340}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"percentile_ex_nulls\", \"type\": \"quantitative\"}, {\"field\": \"percentile_inc_nulls\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"percentile_ex_nulls\", \"sort\": \"descending\", \"title\": \"Percentile\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Count of values\", \"type\": \"quantitative\"}}, \"title\": {\"text\": \"Distribution of counts of values in column substr(dob, 6, 10)\", \"subtitle\": \"In this col, 11,403 values (22.5%) are null and there are 1340 distinct values\"}}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 14386, \"group_name\": \"substr_dob_6_10_\", \"value\": \"01-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 382, \"group_name\": \"substr_dob_6_10_\", \"value\": \"01-81\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 355, \"group_name\": \"substr_dob_6_10_\", \"value\": \"07-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 354, \"group_name\": \"substr_dob_6_10_\", \"value\": \"81-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 342, \"group_name\": \"substr_dob_6_10_\", \"value\": \"01-07\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 236, \"group_name\": \"substr_dob_6_10_\", \"value\": \"04-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 219, \"group_name\": \"substr_dob_6_10_\", \"value\": \"02-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 208, \"group_name\": \"substr_dob_6_10_\", \"value\": \"01-02\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 205, \"group_name\": \"substr_dob_6_10_\", \"value\": \"11-01\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 203, \"group_name\": \"substr_dob_6_10_\", \"value\": \"01-04\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Top 10 values by value count\"}, {\"mark\": \"bar\", \"data\": {\"values\": [{\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"87-18\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"08-45\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"08-72\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"09-48\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"11-59\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"89-06\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"00-25\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"86-18\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"86-16\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}, {\"value_count\": 1, \"group_name\": \"substr_dob_6_10_\", \"value\": \"22-03\", \"total_non_null_rows\": 39175, \"total_rows_inc_nulls\": 50578, \"distinct_value_count\": 1340}]}, \"encoding\": {\"tooltip\": [{\"field\": \"value\", \"type\": \"nominal\"}, {\"field\": \"value_count\", \"type\": \"quantitative\"}, {\"field\": \"total_non_null_rows\", \"type\": \"quantitative\"}, {\"field\": \"total_rows_inc_nulls\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"value\", \"sort\": \"-y\", \"title\": null, \"type\": \"nominal\"}, \"y\": {\"field\": \"value_count\", \"scale\": {\"domain\": [0, 14386]}, \"title\": \"Value count\", \"type\": \"quantitative\"}}, \"title\": \"Bottom 10 values by value count\"}]}], \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.9.3.json\"}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.VConcatChart(...)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "profile_columns(df, column_expressions=\"substr(dob, 6, 10)\", db_api=DuckDBAPI())"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here we can see that over 35% of all dates in this dataset are the 1st January. This is fairly common in manually entered datasets where if only the year of birth is known, people will generally enter the 1st January for that year."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Low cardinality columns\n",
    "\n",
    "Unfortunately, there is not much that can be done to improve low cardinality data. Ultimately, they will provide some evidence of a match between records, but need to be used in conjunction with some more predictive, higher cardinality fields."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Worked Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "hide_output"
    ]
   },
   "outputs": [],
   "source": [
    "from splink import splink_datasets, DuckDBAPI\n",
    "from splink.exploratory import profile_columns\n",
    "\n",
    "df = splink_datasets.historical_50k\n",
    "profile_columns(df, db_api=DuckDBAPI())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
